import re
import hashlib
import pefile
import io
import yara
from binascii import hexlify

from androguard.core.bytecodes import apk
from androguard.core.bytecodes import dvm

from androguard import misc

from malwareconfig.yarascanner import YaraScanner
from zipfile import ZipFile


class FileParser:
    def __init__(self, file_path=None, rawdata=None):
        if file_path:
            file_object = open(file_path, 'rb')
        else:
            file_object = io.BytesIO(rawdata)
            file_object.name = "DummyFile.ext"
        self.file_name = file_object.name
        self.file_data = file_object.read()
        self.file_size = len(self.file_data)
        self.malware_name = ''
        self.yarascan()
        file_object.close()

    def yarascan(self):
        scanner = YaraScanner()
        scanner.yara_scan(self.file_data)
        if len(scanner.rule_list) > 0:
            self.malware_name = scanner.rule_list[0]

    def file_hash(self, hash_type="sha256"):
        filehash = ''
        if hash_type == "sha256":
            filehash = hashlib.sha256(self.file_data).hexdigest()
        if hash_type == 'md5':
            filehash = hashlib.md5(self.file_data).hexdigest()
        return filehash

    def pe_resource_id(self, res_id):
        """
        Read resource by its ID. Useful where normal pe file fails. 
        :return: list
        """
        try:
            rules = yara.compile(source='import "pe" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara DotNet did you enable it?")
        resource_list = []

        def modules_callback(data):
            for i, resource in enumerate(data.get('resources', [])):
                if 'id' in resource:
                    if resource['id'] == res_id:
                        offset = resource['offset']
                        length = resource['length']
                        self.res_data = self.file_data[offset:offset + length]
                elif 'name_string' in resource:
                    # Remove null bytes for a better comparison
                    res_name = resource['name_string'].decode('UTF-8').replace('\x00', '')
                    # Check both unicode and plain str versions of name

                    if res_name == res_id or resource['name_string'] == res_id:
                        offset = resource['offset']
                        length = resource['length']
                        self.res_data = self.file_data[offset:offset + length]
            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)
        return self.res_data


    def pe_resource_names(self):
        """
        Read PE Resources and return a list of resource names
        :return: list
        """
        resource_names = []
        pe = pefile.PE(data=self.file_data)
        for rsrc in pe.DIRECTORY_ENTRY_RESOURCE.entries:
            for entry in rsrc.directory.entries:
                if entry.name is not None:
                    resource_names.append(entry.name.decode('utf-8'))
        return resource_names

    def pe_resource_by_name(self, resource_name):
        """
        Extract a PE Resource from a binary by name
        :param resource_name: str
        :return: byte array
        """
        offset = 0x00
        size = 0x00

        pe = pefile.PE(data=self.file_data)
        for rsrc in pe.DIRECTORY_ENTRY_RESOURCE.entries:
            for entry in rsrc.directory.entries:
                if entry.name is not None:
                    if entry.name.__str__() == resource_name:
                        offset = entry.directory.entries[0].data.struct.OffsetToData
                        size = entry.directory.entries[0].data.struct.Size

        return pe.get_memory_mapped_image()[offset:offset + size]


    def dotnet_resource_names(self):
        """
        Read .NET Resources and return a list of resource names
        :return: list
        """
        try:
            rules = yara.compile(source='import "dotnet" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara DotNet did you enable it?")
        resource_list = []

        def modules_callback(data):
            for i, resource in enumerate(data.get('resources', [])):
                resource_list.append(resource['name'])
            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)
        return resource_list

    def dotnet_resource_by_name(self, resource_name):
        """
        Extract a .NET Resource by name
        :param resource_name:
        :return:
        """
        try:
            rules = yara.compile(source='import "dotnet" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara DotNet did you enable it?")

        def modules_callback(data):
            for i, resource in enumerate(data.get('resources', [])):
                if resource['name'] == resource_name:
                    offset = resource['offset']
                    length = resource['length']
                    self.res_data = self.file_data[offset:offset + length]


            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)
        return self.res_data

    def dotnet_guids(self):
        """
        Exrtract GUIDS from a .NET Binary
        :return: list of guids
        """
        try:
            rules = yara.compile(source='import "dotnet" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara DotNet did you enable it?")
        guid_list = []

        def modules_callback(data):
            
            for i, guid in enumerate(data.get('guids', [])):
                guid_list.append(guid.decode('utf-8'))
            # Type lib is also valid as a GUID for nanocore so lets add that. 
            guid_list.append(data.get('typelib').decode('utf-8'))
            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)
        return guid_list

    def dotnet_user_strings(self):
        """
        Parse a list of User Strings from a .NET Binary file
        :return: list of strings
        """
        try:
            rules = yara.compile(source='import "dotnet" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara DotNet did you enable it?")
        user_strings = []

        def modules_callback(data):
            for i, userstring in enumerate(data.get('user_strings', [])):
                # Remove null bytes
                userstring = userstring.replace(b'\x00', b'')

                # Add string to list
                try:
                    user_strings.append(userstring.decode('utf-8'))
                except UnicodeDecodeError:
                    pass

            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)

        return user_strings


    def ascii_strings(self, min_len=4):
        """
        parse a list of ascii strings from a binary file
        :return:
        """
        string_list = []
        chars = b" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t"
        regexp = b'[%s]{%d,}' % (chars, min_len)
        pattern = re.compile(regexp)
        for s in pattern.finditer(self.file_data):
            string_list.append(s.group())
        return string_list

    def unicode_strings(self, min_len=4):
        string_list = []
        chars = r" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t"
        regexp = b'((?:[%s]\x00){%d,})' % (chars, min_len)
        pattern = re.compile(regexp)
        for s in pattern.finditer(self.file_data):
            string_list.append(s.group())

    def file_from_zip(self, filename):
        new_zip = io.BytesIO(self.file_data)
        with ZipFile(new_zip, 'r') as open_zip:
            for name in open_zip.namelist():
                if name == filename:
                    zip_data = open_zip.read(name)
                    return zip_data

    def zip_namelist(self):
        new_zip = io.BytesIO(self.file_data)
        filelist = []
        with ZipFile(new_zip, 'r') as open_zip:
            for name in open_zip.namelist():
                filelist.append(name)
        return filelist


    def parse_apk(self):
        a, d, dx = misc.AnalyzeAPK(self.file_data, raw=True)
        return a,d,dx


    def elf_list_sections(self):
        """
        Read a list of sections from an elf binary
        :return: list of section names
        """
        try:
            rules = yara.compile(source='import "elf" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara ELF did you enable it?")
        section_names = []

        def modules_callback(data):
            for i, section in enumerate(data.get('sections', [])):
                section_names.append(section['name'].decode('utf-8'))
            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)

        return section_names


    def elf_section_by_name(self, resource_name):
        """
        Extract an elf section by name
        :param resource_name:
        :return:
        """
        try:
            rules = yara.compile(source='import "elf" rule a { condition: false }')
        except yara.SyntaxError:
            print("Error using Yara ELF did you enable it?")

        def modules_callback(data):
            for i, section in enumerate(data.get('sections', [])):
                if section['name'].decode('utf-8') == resource_name:
                    offset = section['offset']
                    length = section['size']
                    self.res_data = self.file_data[offset:offset + length]
            return yara.CALLBACK_CONTINUE

        rules.match(data=self.file_data, modules_callback=modules_callback)
        return self.res_data